#--------------------------------------------------#
#   Academic freedom and the onset of autocratization   #
#--------------------------------------------------#

# Title: "Academic freedom and the onset of autocratization" #
# Authors: "Pelke, Lars", FAU Erlangen-Nürnberg
# date: 2023-04-24
# journal: Democratization
# DOI: 10.1080/13510347.2023.2207213
# written under "R version 4.2.3 (2023-03-15)"

#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# set working directory
#setwd("P:/PIPM/7. VWS Projekt Academic Freedom Index/replication_files_autocratization_and_academic_freedom")

# loading packages

library(countrycode)
library(tidyverse)
library(scales)
library(lavaan)


#### Import Data ####

#Load World value survey data

wvsdata <- readRDS("data/WVS/WVS_TimeSeries_1981_2022_Rds_v3_0.rds")
head(wvsdata)

#### Reduce Datasets ####

wvsdata<- wvsdata %>%
  dplyr::select(X001, X002, X003, X011, X025R, X028, X047_WVS, X045, E114, E115, E116, E117, S020, 
         S003, S025, S002VS, X023, X023R, 
         X025, X025A_01) 
# X001 = sex; X002 = year of birth; X003 = Age; X011 = number of children; X025R = education level; 
# X028 = employement status; X045 = social class; X047_WVS = scale of income; 
# E114 = strong leader; E115 = having experts making decision; E116 = having army rule; E117 = having a democratic political system
# S020 = year of survey ; S003 = country_code; S025 = country-year code; S002VS = survey round
# X023 = age of completed education; X023R = age of completed education WVS 7
  
summary(wvsdata)

#### Prepare WVS Variables for Empirical Analysis ####

wvsdata <- wvsdata %>%
  mutate(sex = ifelse(X001==1, 1,
                      ifelse(X001==2, 0, NA)))
table(wvsdata$sex)
sum(is.na(wvsdata$sex))

wvsdata <- wvsdata %>%
  mutate(age= X003) %>%
  mutate(age = ifelse(X003<0, NA, age))
summary(wvsdata$age)
sum(is.na(wvsdata$age))

wvsdata <- wvsdata %>%
  mutate(education= X025) %>%
  mutate(education = ifelse(X025<0, NA, education))
summary(wvsdata$education)
sum(is.na(wvsdata$education))

wvsdata_wave7 <- wvsdata %>%
  filter(S002VS == 7)

wvsdata <- wvsdata %>%
  filter(S002VS != 7 ) %>%
  mutate(graduate = ifelse(education >=8, 1, 0)) %>%
  dplyr::select(-education)

wvsdata_wave7 <- wvsdata_wave7 %>%
  mutate(education= X025A_01) %>%
  mutate(education = ifelse(X025A_01<0, NA, education)) %>%
  mutate(graduate = ifelse(education >=5, 1, 0)) %>%
  dplyr::select(-education)

summary(wvsdata_wave7$graduate)
sum(is.na(wvsdata_wave7$graduate))

wvsdata <- wvsdata %>%
  bind_rows(wvsdata_wave7)

rm(wvsdata_wave7)

summary(wvsdata$graduate)
summary(wvsdata$X025R)

table(wvsdata$graduate)
table(wvsdata$X025R)


wvsdata <- wvsdata %>%
  mutate(birth_year = ifelse(X002<0, NA, X002))
summary(wvsdata$birth_year)
sum(is.na(wvsdata$birth_year))

wvsdata <- wvsdata %>%
  mutate(income_deciles= X047_WVS) %>%
  mutate(income_deciles = ifelse(X047_WVS<0, NA, income_deciles))
summary(wvsdata$income_deciles)
sum(is.na(wvsdata$income_deciles))

wvsdata <- wvsdata %>%
  mutate(income_quintiles = ifelse(income_deciles==1, 1,
                                   ifelse(income_deciles==2, 1,
                                          ifelse(income_deciles==3, 2,
                                                 ifelse(income_deciles==4, 2,
                                                        ifelse(income_deciles==5, 3,
                                                               ifelse(income_deciles==6, 3,
                                                                      ifelse(income_deciles==7, 4,
                                                                             ifelse(income_deciles==8, 4,
                                                                                    ifelse(income_deciles==9, 5,
                                                                                           ifelse(income_deciles==10, 5, X047_WVS))))))))))) %>%
  mutate(income_quintiles = ifelse(X047_WVS<0, NA, income_quintiles))
summary(wvsdata$income_quintiles)
sum(is.na(wvsdata$income_quintiles))

wvsdata <- wvsdata %>%
  mutate(social_class = X045) %>%
  mutate(social_class = ifelse(X045<0, NA, social_class))
summary(wvsdata$social_class)
sum(is.na(wvsdata$social_class))

wvsdata <- wvsdata %>%
  mutate(children= ifelse(X011>0, 1,
                          ifelse(X011==0, 0, X011))) %>%
  mutate(children = ifelse(X011<0, NA, children))
summary(wvsdata$children)
sum(is.na(wvsdata$children))

wvsdata <- wvsdata %>%
  mutate(unemployed= ifelse(X028==7, 1,
                            ifelse(X028>=1 & X028<7, 0,
                                   ifelse(X028==8, 0, X028)))) %>%
  mutate(unemployed = ifelse(X028<0, NA, unemployed))
summary(wvsdata$unemployed)
sum(is.na(wvsdata$unemployed))

#### Country Codes to World Values Survey Country Items ####

wvsdata$iso3n <- countrycode(wvsdata$S003, "wvs", "iso3n", warn = TRUE)
wvsdata %>% dplyr::filter(S003 %in% c(70, 446, 499, 688)) # 9469  observations in which it is not clear which country
wvsdata$country_name <- countrycode(wvsdata$S003, "wvs", "country.name", warn = TRUE)

wvsdata <- wvsdata %>%
  drop_na(country_name)

wvsdata$year <- wvsdata$S020

wvsdata$wave <- wvsdata$S002VS

head(wvsdata$country_name)

#### Mutating Dependent Variables ####

## reverse some ordinary scale for different variables ##

wvsdata <- wvsdata %>%
  mutate(strong_leader = E114, 
         strong_leader = ifelse(E114 <0, NA, strong_leader)) %>% # strong leader from good 1 to bad 4
  mutate(expert_gov = E115, 
         expert_gov = ifelse(E115 <0, NA, expert_gov)) %>% # experts instead of governments from good 1 to bad 4)
  mutate(army_rule = E116, 
         army_rule = ifelse(E116 <0, NA, army_rule)) %>% # army rule from good 1 to bad 4)
  mutate(demo_system = E117, 
         demo_system = ifelse(E117 <0, NA, demo_system), 
         demo_system = ifelse(E117 == 4, 1, 
                              ifelse(E117 == 3, 2, 
                                     ifelse(E117 == 2, 3, 
                                          ifelse(E117 == 1, 4, demo_system)))))  # having a democratic system from bad 1 to good 4 after rescaling)
  
table(wvsdata$demo_system)  

#Democratic values 
#Alpha of 0.48

table(wvsdata$strong_leader)
table(wvsdata$expert_gov)
table(wvsdata$army_rule)
table(wvsdata$demo_system)

psych::alpha(wvsdata[c("strong_leader","expert_gov","army_rule","demo_system")], check.keys = T)


## Traditional Factor Model (CFA) ####

## Fit model 1-dim model
dim1 <- cfa('democratic_values =~ strong_leader + expert_gov + army_rule + demo_system',
            data=wvsdata, std.lv=TRUE, missing="ML")

## Loadings are Std.all column for f, Uniquenesses are Std.all columnv for Variances
## Reports TLI, RMSEA, and SRMR
summary(dim1, fit.measures=TRUE, standardized=TRUE)

## And again, saving output
sink("results/Confimatory_factor_analysis.txt")

## Loadings are Std.all column for f, Uniquenesses are Std.all columnv for Variances
## Reports TLI, RMSEA, and SRMR
## Note variables are in a different order than they are in the table in the appendix
print(summary(dim1, fit.measures=TRUE, standardized=TRUE))
sink()

head(lavPredict(dim1))

## merge factor scores to original data.frame

idx <- lavInspect(dim1, "case.idx")
fscores <- lavPredict(dim1)
## loop over factors
for (fs in colnames(fscores)) {
  wvsdata[idx, fs] <- fscores[ , fs]
}
head(wvsdata)

rm(dim1, fscores, idx, fs)

summary(wvsdata$democratic_values)

wvsdata$democratic_values_normalized <- rescale(wvsdata$democratic_values, to = c(0, 1), from = range(wvsdata$democratic_values, na.rm = TRUE, finite = TRUE))

summary(wvsdata$democratic_values_normalized)

#### Age and Cohorts WVS ####

## cohort of respondents ##

wvsdata <- wvsdata %>%
  mutate(cohort = birth_year)
summary(wvsdata$birth_year)


wvsdata <- wvsdata %>%
  mutate(cohort_5 = cut(wvsdata$cohort, seq(1885, 2020, by = 5), right = F, labels = c(1885, 1890, 1895, 1900, 1905,
                                                                                       1910, 1915, 1920, 1925, 1930, 
                                                                                       1935, 1940, 1945, 1950, 1955,
                                                                                       1960, 1965, 1970, 1975, 1980,
                                                                                       1985, 1990, 1995, 2000, 2005, 
                                                                                       2010, 2015)
  )) 

wvsdata <- wvsdata %>%
  mutate(cohort_10 = cut(wvsdata$cohort, seq(1885, 2015, by = 10), right = F, labels = c(1885, 1895, 1905,
                                                                                        1915, 1925,  
                                                                                        1935, 1945, 1955,
                                                                                        1965, 1975, 
                                                                                        1985, 1995, 2005)
  )) 

wvsdata$cohort_5 <- as.numeric(as.character(wvsdata$cohort_5))
wvsdata$cohort_10 <- as.numeric(as.character(wvsdata$cohort_10))


wvsdata <- wvsdata %>%
  mutate(cohortmatch5_20 = cohort_5 + 20) # 5-year cohorts year plus 25 years as university education typically occur between 15 and 25 years old

wvsdata <- wvsdata %>%
  mutate(cohortmatch10_20 = cohort_10+ 20) # 5-year cohorts year plus 20 years as university education typically occur between 15 and 25 years old


table(wvsdata$cohort_5)
table(wvsdata$cohortmatch5_20)

#### Generate Dataset ID ####

wvsdata <- wvsdata %>%
  mutate(data = "WVS")
table(wvsdata$data)

#### SAVE DATASET ####

wvsdata_subset <- wvsdata %>%
  dplyr::select(data, iso3n, country_name, year, wave, sex, age, graduate, birth_year, income_deciles, 
                income_quintiles, social_class, children, unemployed, strong_leader, expert_gov, army_rule, demo_system, 
                democratic_values,cohort_5, cohort_10, cohortmatch5_20, cohortmatch10_20, democratic_values_normalized)

saveRDS(wvsdata_subset, file = "data/wvsdata_prepared.rds")





